import sys
import shutil
import os
import re

input_files = sys.argv[1]
output_file = sys.argv[2]
fh_files = open(input_files)
for input_file in fh_files: 
	fh = open(input_file.strip())

	num_tweets = fh.readline()
	line_mod = -1
	prev_time_t = ""
	line = fh.readline()
	while len(line)>0:
		line_mod += 1
		if line_mod == 0:
			#Time line
			line_t = line
			time_t = re.search("\d\d\d\d-\d\d-\d\d",line).group()
		elif line_mod == 1:
			#User line
			line_u = line
		elif line_mod == 2:
			#Text line, ignore weird characters
			if re.search("[^-+=\w\s@_:;,.<>/(){}*&\^%$#!~'\"\[\]|?]",line) is None:
				#remove the W
				line = re.sub("^W	","",line)
				line = line.lower()
				if re.search("no post title",line) is not None: #ignore if it is an empty post
					line = fh.readline()
					continue
				line = re.sub("http:[-\w/%\d.|+=_&$#@!~?:,]+\n","HTTPADDR\n",line)
				line = re.sub("http:[-\w/%\d.|+=_&$#@!~?,]+[\s()\"'<>\[\]*{}]","HTTPADDR ",line)
				line = re.sub("www.[-\w/%\d.|+=_&$#@!~?,]+[\s()\"'<>\[\]*{}]","HTTPADDR ",line)
				line = re.sub("\d[\d,.]*","NUMBER",line)
				line = re.sub("@[\d\w]*","USER",line)
				line = re.sub("'nt"," not",line)
				line = re.sub("he's","he is",line)
				line = re.sub("'ve"," have",line)
				line = re.sub("'re"," are",line)
				line = re.sub("'m"," am",line)
				line = re.sub("'d"," would",line)
				line = re.sub("'ll"," will",line)
				line = re.sub("can't","can not", line)
				line = re.sub("won't","will not", line)
				line = re.sub("n't"," not", line)
				line = re.sub("who's","who is", line)
				line = re.sub("what's","what is", line)
				line = re.sub("where's","where is", line)
				line = re.sub("when's","when is", line)
				line = re.sub("why's","why is", line)
				line = re.sub("that's","that is", line)
				line = re.sub("there's","there is", line)
				line = re.sub("it's","it is", line)
				line = re.sub("(:\))|(:-\))|(\(:)|(\(-:)|(=\))|(\(=)|(:d)|(:p)|(:-d)|(:-p)","HAPPYFACE",line)
				line = re.sub(";\)","WINKFACE",line)
				line = re.sub("(:\()|(:-\()|(\):)|(\)-:)|(=\()|(\)=)","SADFACE",line)
				line = re.sub("((haha)|(hehe)|(lol))+[\s!.,-]","LAUGH",line)
	#			line = re.sub("[.]+",".",line)
	#			line = re.sub("[!]+","!",line)
	#			line = re.sub("[,]+",",",line)
	#			line = re.sub("[-]+","-",line)
	#			line = re.sub("[=]+","=",line)
				line = re.sub("[-=+`~!@#$%^&*()/,.:;'\"\[\]{}\\|<>?]","",line)
				tweet = ["NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL"]


				curr_word = 0
				for word in line.split():
					if curr_word == 50:
						break
					#check if there isn't a token in the middle of a word
					if word.isupper() == False and word.islower() == False:
						continue
					tweet[curr_word] = word
					curr_word += 1

				#there's something worth writing about this tweet
				if curr_word > 0:
					#open the file corresponding to this tweet's time
					if prev_time_t == "" or prev_time_t != time_t:
						print time_t
						prev_time_t = time_t
						output_file = sys.argv[2]+" "+time_t+".txt"
						fh1 = open(output_file,'a')
					#fh1.write(line_t)
					#fh1.write(line_u)
					fh1.write(" ".join(tweet)+"\n")
					#fh1.write("\n")
		elif line_mod == 3:
			#do nothing
			line_mod = -1
		line = fh.readline()
